Advanced Lane Finding Project

The goals / steps of this project are the following:

  • Compute the camera calibration matrix and distortion coefficients given a set of chessboard images.
  • Apply a distortion correction to raw images.
  • Use color transforms, gradients, etc., to create a thresholded binary image.
  • Apply a perspective transform to rectify binary image ("birds-eye view").
  • Detect lane pixels and fit to find the lane boundary.
  • Determine the curvature of the lane and vehicle position with respect to center.
  • Warp the detected lane boundaries back onto the original image.
  • Output visual display of the lane boundaries and numerical estimation of lane curvature and vehicle position.

0. Imports and Helpers

In [1]:
import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt

from collections import namedtuple

%matplotlib inline
In [2]:
LANE_WIDTH_METERS = 3.7
LANE_LENGTH_METERS = 30.0

Point = namedtuple('Point', ['x', 'y'])
Size = namedtuple('Size', ['width', 'height'])

def np_zip(a, b):
    """A version of zip for numpy arrays"""
    
    return np.vstack([a, b]).T

1. Camera Calibration

Camera calibration only needs to occur once, but I have put it in a function to encourage good coding behavior

In [3]:
def find_checkerboard_corners(img, nx, ny):
    """Helper function to detect the internal corners of an image of a checkerboard
    
    Parameters:
        • img - image of a checkerboard
        • nx - number of internal checkerboard corners in the x direction
        • ny - number of internal checkerboard corners in the y direction
        
    Returns:
        A list of corners"""
    
    # openCV reads in images BGR (instead of RGB)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Find the chess board corners
    ret, corners = cv2.findChessboardCorners(gray, (nx, ny), None)
    
    if ret:
        return corners
    
    return []


def camera_calibration(img_files, nx=9, ny=6):
    """Calibrate the camera based on several checkerboard images taken.
    
    Parameters:
        • img_files - list of checkboard image files, ideally taken from different angles and distances
        • nx - number of internal checkerboard corners in the x direction for all images
        • ny - number of internal checkerboard corners in the y direction for all images
        
    Returns:
        Tuple of the camera matrix and the distortion coefficients"""
    
    # prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
    objp = np.zeros((ny * nx, 3), np.float32)
    objp[:,:2] = np.mgrid[0:nx, 0:ny].T.reshape(-1,2)
    
    # Arrays to store object points and image points from all the images.
    objpoints = [] # 3d point in real world space
    imgpoints = [] # 2d points in image plane.

    for idx, img_file in enumerate(img_files):
        img = cv2.imread(img_file)
        
        # find the checkerboard corners
        corners = find_checkerboard_corners(img, nx, ny)

        # If found, add object points, image points (after refining them)
        if len(corners):
            objpoints.append(objp)
            imgpoints.append(corners)
    
    img_size = (img.shape[1], img.shape[0])
    
    ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, img_size, None, None)
    
    return (mtx, dist)
In [4]:
img_files = glob.glob('camera_cal/*.jpg')
mtx, dist = camera_calibration(img_files)

img_file = img_files[13]

img = cv2.imread(img_file)
cv2.imwrite('output_images/checkerboard-00-orig.jpg', img)

plt.title('Original Image of a Checkerboard')
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

corners = find_checkerboard_corners(img, 9, 6)
cv2.drawChessboardCorners(img, (9, 6), corners, True)
cv2.imwrite('output_images/checkerboard-01-corners.jpg', img)

fig = plt.figure()
plt.title('Detected Corners on a Checkerboard')
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB));

2. Undistort

This will need to be run on all images produced by the camera that was calibrated in the previous step

In [5]:
def undistort(img, mtx, dist):
    """Undistort an image using the camera matrix and the distorition coefficients returned by the 'camera_calibration'
    function
    
    Parameters:
        • img - image that needs to be undistorted
        • mtx - camera matrix
        • dist - distortion coefficients
        
    Returns:
        An undistorted image"""
    
    img_size = (img.shape[1], img.shape[0])
    undist = cv2.undistort(img, mtx, dist, None, mtx)
    return undist
In [6]:
img = cv2.imread(img_files[13])

plt.title('Original Image of a Checkerboard')
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

img = undistort(img, mtx, dist)
cv2.imwrite('output_images/checkerboard-02-undistorted.jpg', img)

fig = plt.figure()
plt.title("Undistorted Image")
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB));
In [7]:
undistort_imgs = []
file_bases = [f.split('/')[-1][:-4] for f in glob.glob('test_images/*.jpg')]

for filename, file_base in zip(glob.glob('test_images/*.jpg'), file_bases):
    
    orig_img = cv2.imread(filename)
    cv2.imwrite('output_images/{}-00-orig.jpg'.format(file_base), orig_img)
    
    undistort_img = undistort(orig_img, mtx, dist)
    cv2.imwrite('output_images/{}-01-undist.jpg'.format(file_base), undistort_img)

    undistort_imgs.append(undistort_img)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
    ax1.set_title('Original Image')
    ax1.imshow(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB))

    ax2.set_title('Undistorted Image')
    ax2.imshow(cv2.cvtColor(undistort_img, cv2.COLOR_BGR2RGB))

3. Thresholding functions

Various thresholding functions determine lane boundries. To make the algorithm more robust, these will be combined together.

In [8]:
def absolute_sobel_threshold(img, orient='x', sobel_kernel=3, thresh=(0, 255)):

    """Creates a binary image based on the gradient ine one direction of the input image.
    
    This function uses the Sobel operator to calculate the derivative.
    
    Parameters:
        • img - input image
        • orient - direction to take the derivative ('x' or 'y')
        • sobel_kernel - an odd number to define the size of the Sobel kernel
        • thresh - tuple of low and high thresholds to be included in the output binary
        
    Returns:
        A single channel binary image of detected edges in the original image"""
    
    # 1) Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 2) Take the derivative in x or y given orient = 'x' or 'y'
    filter_type = (orient == 'x', orient == 'y')
    sobel = cv2.Sobel(gray, cv2.CV_64F, *filter_type, ksize=sobel_kernel)

    # 3) Take the absolute value of the derivative or gradient
    abs_sobel = np.absolute(sobel)

    # 4) Scale to 8-bit (0 - 255) then convert to type = np.uint8
    scaled_sobel = np.uint8(255 * abs_sobel / np.max(abs_sobel))

    # 5) Create a mask of 1's where the scaled gradient meets the thresholds 
    grad_binary = np.zeros_like(scaled_sobel)
    grad_binary[(scaled_sobel >= thresh[0]) & (scaled_sobel <= thresh[1])] = 1

    # 6) Return this mask as your binary_output image
    return grad_binary
In [9]:
def magnitude_threshold(img, sobel_kernel=3, mag_thresh=(0, 255)):

    """Creates a binary image based on the overal magnitude of the gradient of the input image.
    
    This function uses the Sobel operator to calculate the derivative.
    
    Parameters:
        • img - input image
        • sobel_kernel - an odd number to define the size of the Sobel kernel
        • thresh - tuple of low and high thresholds to be included in the output binary
        
    Returns:
        A single channel binary image of detected edges in the original image"""

    # 1) Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 2) Take the gradient in x and y separately
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel)

    # 3) Calculate the magnitude 
    abs_sobelxy = (sobelx ** 2 + sobely ** 2) ** 0.5

    # 4) Scale to 8-bit (0 - 255) and convert to type = np.uint8
    scaled_sobel = np.uint8(255 * abs_sobelxy / np.max(abs_sobelxy))

    # 5) Create a binary mask where mag thresholds are met
    mag_binary = np.zeros_like(scaled_sobel)
    mag_binary[(scaled_sobel >= mag_thresh[0]) & (scaled_sobel <= mag_thresh[1])] = 1

    # 6) Return this mask as your binary_output image
    return mag_binary
In [10]:
def direction_threshold(img, sobel_kernel=3, thresh=(0, np.pi/2)):

    """Creates a binary image based on the gradient direction of the input image.
    
    This function uses the Sobel operator to calculate the derivative.
    
    Parameters:
        • img - input image
        • sobel_kernel - an odd number to define the size of the Sobel kernel
        • thresh - tuple of low and high thresholds to be included in the output binary
        
    Returns:
        A single channel binary image of detected edges in the original image"""

    # 1) Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 2) Take the gradient in x and y separately
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=sobel_kernel)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=sobel_kernel)

    # 3) Take the absolute value of the x and y gradients
    abs_sobelx = np.absolute(sobelx)
    abs_sobely = np.absolute(sobely)

    # 4) Use np.arctan2(abs_sobely, abs_sobelx) to calculate the direction of the gradient 
    direction = np.arctan2(abs_sobely, abs_sobelx)

    # 5) Create a binary mask where direction thresholds are met
    dir_binary = np.zeros_like(direction, dtype=img.dtype)
    dir_binary[(direction >= thresh[0]) & (direction <= thresh[1])] = 1

    # 6) Return this mask as your binary_output image
    return dir_binary
In [11]:
def saturation_threshold(img, thresh=(0, 255)):
    
    """Creates a binary image based on the saturation channel of the input image converted to HLS color space.
        
    Parameters:
        • img - input image
        • thresh - tuple of low and high thresholds to be included in the output binary
        
    Returns:
        A thresholded single channel binary image of the original image"""

    # 1) Convert to HLS color space
    hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)

    # 2) Apply a threshold to the S channel
    s = hls[:, :, 2]
    sat_binary = np.zeros_like(s)
    sat_binary[(s > thresh[0]) & (s <= thresh[1])] = 1

    # 3) Return a binary image of threshold result
    return sat_binary
In [12]:
def region_of_interest(img):
    """
    Applies an image mask.
    
    Only keeps the region of the image defined by the polygon
    formed from `vertices`. The rest of the image is set to black.
    """
    vertices = np.array([[[20, 720], [468, 446], [812, 446], [1260, 720]]], dtype=np.int32)

    # defining a blank mask to start with
    mask = np.zeros_like(img)
    mask = np.dstack((mask, mask, mask))

    #defining a 3 channel or 1 channel color to fill the mask with depending on the input image
    if len(mask.shape) > 2:
        channel_count = mask.shape[2]  # i.e. 3 or 4 depending on your image
        if channel_count > 1:
            ignore_mask_color = (255,) * channel_count
        else:
            ignore_mask_color = 255
    else:
        ignore_mask_color = 255

    #filling pixels inside the polygon defined by "vertices" with the fill color    
    cv2.fillPoly(mask, vertices, ignore_mask_color)

    #returning the image only where mask pixels are nonzero
    masked_image = cv2.bitwise_and(img, mask[:, :, 0])
    return masked_image
In [13]:
def threshold(img):
    
    """Creates a binary image based on combining various thresholding techniques.
    
    Paramters:
        • img - input image
        
    Returns:
        A binary image where the lane markers are clearly visible"""
    
    ksize = 5
    
    gradx = absolute_sobel_threshold(img, orient='x', sobel_kernel=ksize, thresh=(20, 100))
    grady = absolute_sobel_threshold(img, orient='y', sobel_kernel=ksize, thresh=(20, 100))
    mag_binary = magnitude_threshold(img, sobel_kernel=ksize, mag_thresh=(20, 100))
    dir_binary = direction_threshold(img, sobel_kernel=ksize, thresh=(0.7, 1.3))
    
    sat_binary = saturation_threshold(img, thresh=(90, 255))

    combined = np.zeros_like(dir_binary)
    combined[(((gradx == 1) & (grady == 1)) | ((mag_binary == 1) & (dir_binary == 1))) | (sat_binary == 1)] = 1

    return combined
In [14]:
thresholded_imgs = []

for undistort_img, file_base in zip(undistort_imgs, file_bases):

    thresholded_img = threshold(undistort_img)
    cv2.imwrite('output_images/{}-02-thresh.jpg'.format(file_base), np.dstack((thresholded_img, thresholded_img, thresholded_img)) * 255)
    
    thresholded_imgs.append(thresholded_img)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.set_title('Undistorted Image')
    ax1.imshow(cv2.cvtColor(undistort_img, cv2.COLOR_BGR2RGB))

    ax2.set_title('Thresholded Image')
    ax2.imshow(thresholded_img, cmap='gray')

4. Perspective Transformation

These functions will transform images from perspective view to birds-eye view

In [15]:
def gen_transform_matrices():
    
    """Generates the transform matrix and the inverse transform matrix to warp an image from
    perspective view to birds-eye view
    
    Returns:
        A tuple of the transform matrix and the inverse transform matrix"""
    
    # define the source and destination points for the desired transformation
    src = np.float32([[140, 720], [581, 450], [699, 450], [1140, 720]])    
    dst = np.float32([[140, 720], [140, 0], [1140, 0], [1140, 720]])
    
    # get the transform matrix
    M = cv2.getPerspectiveTransform(src, dst)
    
    # get the inverse transform matrix
    M_inv = cv2.getPerspectiveTransform(dst, src)
    
    return (M, M_inv)
In [16]:
def transform_birdseye(img):
    
    """Applies a transformation to warp an image. The result is a birds-eye view of the lane.
    
    Parameters:
        • img - input image of the lane
        
    Returns:
        A birds-eye view image of the lane"""

    # get the transform matrices
    M, M_inv = gen_transform_matrices()
    
    # get the image size
    img_size = (img.shape[1], img.shape[0])
    
    # warp the image
    warped = cv2.warpPerspective(img, M, img_size, flags=cv2.INTER_LINEAR)
    
    return warped
In [17]:
birdseye_imgs = []

for thresholded_img, file_base in zip(thresholded_imgs, file_bases):

    birdseye_img = transform_birdseye(thresholded_img)
    cv2.imwrite('output_images/{}-03-warp.jpg'.format(file_base), np.dstack((birdseye_img, birdseye_img, birdseye_img)) * 255)
    
    birdseye_imgs.append(birdseye_img)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
    ax1.set_title('Thresholded Image')
    ax1.imshow(thresholded_img, cmap='gray')

    ax2.set_title('Birds-Eye Warped Image')
    ax2.imshow(birdseye_img, cmap='gray')

5. Detect Lane Markings

Detect the pixels associated with the lane markings

In [18]:
hist = np.sum(birdseye_img[np.int(birdseye_img.shape[0]/2):,:], axis=0)
center_x = np.int(hist.shape[0] / 2)
left_peak = np.where(hist == np.max(hist[:center_x]))[0][0]
right_peak = np.where(hist[center_x:] == np.max(hist[center_x:]))[0][0] + center_x

print("{} - {} - {}".format(left_peak, center_x, right_peak))
plt.plot(hist);
302 - 640 - 1195
In [19]:
def find_windows_0(img):
    
    # calculate a histogram of the lower half of the image. This should help us determine the starting x position
    # of the two lane markings
    hist = np.sum(img[np.int(img.shape[0] / 2):,:], axis=0)
    
    # find the center of the image and the x coordinate of the left and right peaks in the histogram. These should
    # hopefully correspond to the left and right lane markings
    center_x = np.int(hist.shape[0] / 2)
    left_x = np.where(hist == np.max(hist[:center_x]))[0][0]
    right_x = np.where(hist[center_x:] == np.max(hist[center_x:]))[0][0] + center_x
    
    nwindows = 9
    window_width = 200
    
    half_window_size = Size(np.int(window_width / 2), np.int(img.shape[0] / nwindows))
    
    # minimum number of pixels found to recenter window
    min_pixels = 50
    
    # identify the x and y positions of all nonzero pixels in the image
    nonzero = img.nonzero()
    nonzero_y = np.array(nonzero[0])
    nonzero_x = np.array(nonzero[1])

    all_left_idxs = []
    all_right_idxs = []
    
    left_windows = []
    right_windows = []

    for i in range(nwindows):
        # Identify window boundaries in x and y (and right and left)
        min_y = max(img.shape[0] - (i + 1) * half_window_size.height, 0)
        max_y = img.shape[0] - i * half_window_size.height

        min_x_left = max(left_x - half_window_size.width, 0)
        max_x_left = min(left_x + half_window_size.width, img.shape[1])
        
        min_x_right = max(right_x - half_window_size.width, 0)
        max_x_right = min(right_x + half_window_size.width, img.shape[1])

        left_windows.append([(min_x_left, min_y), (max_x_left, max_y)])
        right_windows.append([(min_x_right, min_y), (max_x_right, max_y)])
        
        # identify the nonzero pixels in x and y within the window
        left_idxs = ((nonzero_y >= min_y) & (nonzero_y < max_y) & (nonzero_x >= min_x_left) & (nonzero_x < max_x_left)).nonzero()[0]
        right_idxs = ((nonzero_y >= min_y) & (nonzero_y < max_y) & (nonzero_x >= min_x_right) & (nonzero_x < max_x_right)).nonzero()[0]

        # if you found > minimum pixels, recenter next window on their mean position
        if len(left_idxs) > min_pixels:
            left_x = np.int(np.mean(nonzero_x[left_idxs]))
        if len(right_idxs) > min_pixels:        
            right_x = np.int(np.mean(nonzero_x[right_idxs]))
            
    return (left_windows, right_windows)
In [20]:
def find_windows_1(img):
    
    left_windows = []
    right_windows = []

    nwindows = 12
    window_width = 200
    margin = int(window_width / 2)
    
    window_size = Size(window_width, np.int(img.shape[0] / nwindows))
    offset = window_size.width / 2
    
    min_pixels = int(margin * 2 * window_size.height * 0.01)
    
    window = np.ones(window_size.width)
    
    l_sum = np.sum(img[int(img.shape[0] / 2):, :int(img.shape[1] / 2)], axis=0)
    l_center = int(np.argmax(np.convolve(window, l_sum)) - window_size.width / 2)
    r_sum = np.sum(img[int(img.shape[0] / 2):, int(img.shape[1] / 2):], axis=0)
    r_center = int(np.argmax(np.convolve(window, r_sum)) - window_size.width / 2 + int(img.shape[1] / 2))
    
    min_y = max(img.shape[0] - window_size.height, 0)
    max_y = img.shape[0]

    left_windows.append(((l_center - int(window_size.width / 2), min_y), (l_center + int(window_size.width / 2), max_y)))
    right_windows.append(((r_center - int(window_size.width / 2), min_y), (r_center + int(window_size.width / 2), max_y)))
    
    for i in range(1, nwindows):
        min_y = max(img.shape[0] - (i + 1) * window_size.height, 0)
        max_y = img.shape[0] - i * window_size.height
        
        # convolve the window into the vertical slice of the image
        image_layer = np.sum(img[min_y:max_y, :], axis=0)
        conv_signal = np.convolve(window, image_layer)
        
        # find the best left centroid by using past left center as a reference
        # use half of window width as offset because convolution signal reference is at right side of window, not center of window
        l_min_index = int(max(l_center + offset - margin, 0))
        l_max_index = int(min(l_center + offset + margin, img.shape[1]))

        if np.sum(conv_signal[l_min_index:l_max_index]) / window_size.width > min_pixels:
            l_center = int(np.argmax(conv_signal[l_min_index:l_max_index]) + l_min_index - offset)
        
        # Find the best right centroid by using past right center as a reference
        r_min_index = int(max(r_center + offset - margin, 0))
        r_max_index = int(min(r_center + offset + margin, img.shape[1]))
        
        if np.sum(conv_signal[r_min_index:r_max_index]) / window_size.width > min_pixels:
            r_center = int(np.argmax(conv_signal[r_min_index:r_max_index]) + r_min_index - offset)
        
        # Add what we found for that layer
        left_windows.append(((max(l_center - int(window_size.width / 2), 0), min_y), (l_center + int(window_size.width / 2), max_y)))
        right_windows.append(((r_center - int(window_size.width / 2), min_y), (min(r_center + int(window_size.width / 2), img.shape[1]), max_y)))

    return left_windows, right_windows
In [21]:
def detect_lane_markings(img):
    
    """Detect the lane markings by using peaks in a histogram to guess at the 'x' location of the lane markings
    and a sliding window starting at the previously found 'x' position as it moves in the 'y' direction
    
    Parameter:
        • img - binary birds-eye view image of the lane
        
    Returns:
        A tuple of:
            • list of left lane pixels
            • list of right lane pixels
            • list of left windows
            • list of right windows"""
    
    left_windows, right_windows = find_windows_0(img)

    # identify the x and y positions of all nonzero pixels in the image
    nonzero = img.nonzero()
    nonzero_y = np.array(nonzero[0])
    nonzero_x = np.array(nonzero[1])

    all_left_idxs = []
    all_right_idxs = []
    
    for window in left_windows:
        
        (min_x, min_y), (max_x, max_y) = window
        
        idxs = ((nonzero_y >= min_y) & (nonzero_y < max_y) & (nonzero_x >= min_x) & (nonzero_x < max_x)).nonzero()[0]        
        all_left_idxs.append(idxs)

    for window in right_windows:
        
        (min_x, min_y), (max_x, max_y) = window
        
        idxs = ((nonzero_y >= min_y) & (nonzero_y < max_y) & (nonzero_x >= min_x) & (nonzero_x < max_x)).nonzero()[0]
        all_right_idxs.append(idxs)

    all_left_idxs = np.concatenate(all_left_idxs)
    all_right_idxs = np.concatenate(all_right_idxs)

    # extract left and right line pixel positions
    left_xs = nonzero_x[all_left_idxs]
    left_ys = nonzero_y[all_left_idxs] 
    right_xs = nonzero_x[all_right_idxs]
    right_ys = nonzero_y[all_right_idxs] 

    return (np_zip(left_ys, left_xs), np_zip(right_ys, right_xs), left_windows, right_windows)
In [22]:
def fit_curve(points, order=2):

    """Fits a curve to the points
    
    Parameters:
        • points - list of points
        • order - order of the polynomial to fit
        
    Returns:
        Parameters describing a fitted curve of the specified order"""
    
    return np.polyfit(points[:, 0], points[:, 1], order)
In [23]:
def draw_windows(img, left, right, color=[0, 255, 0], thickness=2):

    """Draws the windows used on the image
    
    Pameters:
        • img - input three channel birds-eye view image
        • left - list of windows used to discover the left lane
        • right - list of windows used to discover the right lane
        • color - tuple of B, G, R values
        • thickness - thickness of the lines
        
    Returns:
        A three channel image with the windows overlaying"""
    
    out = np.copy(img)
    
    for l, r in zip(left, right):
        cv2.rectangle(out, l[0], l[1], color, thickness)        
        cv2.rectangle(out, r[0], r[1], color, thickness)
        
    return out
In [24]:
def gen_curve_points(img_shape, poly):
    
    """Generates a list of x and y points for a curve defined by the curve coefficients
    
    Parameters:
        • img_shape - shape of the image used
        • poly - coefficients describing the polynomial
        
    Returns:
        A tuple of lists of the x and y coordinates for the curve based on the image shape"""
    
    # y values to plot
    plot_y = np.arange(img.shape[0])
    
    # polynomial powers
    powers = np.arange(len(poly) - 1, -1, -1).reshape(1, len(poly))
    
    # calculate x position
    plot_x = np.sum((plot_y.reshape(img.shape[0], 1) ** powers) * poly, axis=1).astype(np.int)
    
    return (plot_x, plot_y)
In [25]:
def draw_fitted_curves(img, left, right, color=[0, 255, 255], thickness=5):
    
    """Draws the fitted curves for the lanes on the image
    
    Parameters:
        • img - input three channel birds-eye view image
        • left - parameters that describe the curve for the left lane
        • right - parameters that describe the curve for the right lane
        • color - tuple of B, G, R values
        
    Returns:
        A three channel image with the best fit curve for the lanes"""
    
    out = np.copy(img)
    
    for poly in (left, right):

        plot_x, plot_y = gen_curve_points(img.shape, poly)

        try:
            offsets_x = np.arange(thickness) - np.ceil(thickness / 2 - 1)
            for i in offsets_x.astype(np.int):
                out[plot_y, plot_x + i] = color
        except IndexError:
            pass
        
    return out
    
In [26]:
detected_imgs = []

for birdseye_img, file_base in zip(birdseye_imgs, file_bases):

    left_points, right_points, left_windows, right_windows = detect_lane_markings(birdseye_img)

    left_poly = fit_curve(left_points)
    right_poly = fit_curve(right_points)
    
    detected_img = np.dstack((birdseye_img, birdseye_img, birdseye_img)) * 255
    
    detected_img[left_points[:, 0], left_points[:, 1]] = [0, 0, 255]
    detected_img[right_points[:, 0], right_points[:, 1]] = [255, 0, 0]
    
    detected_img = draw_windows(detected_img, left_windows, right_windows)
    detected_img = draw_fitted_curves(detected_img, left_poly, right_poly)
    
    cv2.imwrite('output_images/{}-04-detect.jpg'.format(file_base), detected_img)
    
    detected_imgs.append(detected_img)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.set_title('Birds-Eye Warped Image')
    ax1.imshow(birdseye_img, cmap='gray')

    ax2.set_title('Detected Lanes')
    ax2.imshow(cv2.cvtColor(detected_img, cv2.COLOR_BGR2RGB))
    

6. Calculate Curvature and Position

Calculate the curvature of the road and the position of the car in the lane

In [27]:
def calc_lane_curvature(img_shape, left_poly, right_poly):
    
    """Calculate the curvature of the lane
    
    Parameters:
        • img_shape - shape of the image
        • left_poly - coefficients describing the polynomial of the left lane marker
        • right_poly - coefficients describing the polynomial of the right lane marker

    Returns:
        The radius of the curvature of the lane in meters"""
    
    # generate the points of the best fit lane marking curves
    left_x, left_y = gen_curve_points(img_shape, left_poly)
    right_x, right_y = gen_curve_points(img_shape, right_poly)

    # calculate the width of the lane in pixels
    lane_width_pixels = right_x[-1] - left_x[-1]
    
    # point to evaluate the curvature
    y_eval = img_shape[0] - 1
    
    # define conversions in x and y from pixels space to meters
    mpp = Point(LANE_WIDTH_METERS / lane_width_pixels, LANE_LENGTH_METERS / img_shape[0])

    left_poly_m = np.polyfit(left_y * mpp.y, left_x * mpp.x, 2)
    right_poly_m = np.polyfit(right_y * mpp.y, right_x * mpp.x, 2)
    
    # calculate the new radii of curvature
    radii = []
    
    for poly in (left_poly_m, right_poly_m):
        radius = ((1 + (2 * poly[0] * y_eval * mpp.y + poly[1]) ** 2) ** 1.5) / np.absolute(2 * poly[0])
        radii.append(radius)
        
    return sum(radii) / len(radii)
In [28]:
def calc_lane_center_offset(img_shape, left_poly, right_poly):
    
    """Calculate how much the car is offset from the center of the lane, assuming the camera
    is at the center of the car
    
    Parameters:
        • img_shape - shape of the image
        • left_poly - coefficients describing the polynomial of the left lane marker
        • right_poly - coefficients describing the polynomial of the right lane marker

    Returns:
        Offset from the center of the lane in meters"""
    
    # generate the points of the best fit lane marking curves
    left_x, left_y = gen_curve_points(img_shape, left_poly)
    right_x, right_y = gen_curve_points(img_shape, right_poly)

    # calculate the width of the lane in pixels
    lane_width_pixels = right_x[-1] - left_x[-1]

    # calculate meters per pixels
    meters_per_pixel = LANE_WIDTH_METERS / lane_width_pixels
    
    # calculate the offset in pixels
    offset_pixels = img_shape[1] / 2 - lane_width_pixels / 2 - left_x[-1]
    
    # convert the offset to meters
    offset_meters = offset_pixels * meters_per_pixel
    
    return offset_meters
In [29]:
positionals = []

for birdseye_img, file_base in zip(birdseye_imgs, file_bases):

    left_points, right_points, left_windows, right_windows = detect_lane_markings(birdseye_img)

    left_poly = fit_curve(left_points)
    right_poly = fit_curve(right_points)
        
    radius = calc_lane_curvature(birdseye_img.shape, left_poly, right_poly)
    offset = calc_lane_center_offset(birdseye_img.shape, left_poly, right_poly)
    
    positionals.append((radius, offset))
        

7. Warp Lane Markings to Perspective View

Use the inverse transform matrix to warp the lane markings back onto the original perspective image

In [30]:
def transform_lane_to_perspective(img_shape, left_poly, right_poly, M_inv, color=[0, 255, 0]):
    
    """Draws a polygon to represent the lane and warps it to overlay the perspective image.
    
    Parameters:
        • img_shape - shape of the image
        • left_poly - coefficients describing the polynomial of the left lane marker
        • right_poly - coefficients describing the polynomial of the right lane marker
        • M_inv - matrix describing the transformation from birds-eye to perspective view
        • color - color of the polygon to be drawn
        
    Returns:
        An image of the polygon of the lane. This can then be overlayed on the actual image"""
    
    # generate the points of the best fit lane marking curves
    left_x, left_y = gen_curve_points(img_shape, left_poly)
    right_x, right_y = gen_curve_points(img_shape, right_poly)
    
    # create an image, on which to draw the lines
    zeros = np.zeros(img_shape[0:2], dtype=np.uint8)
    birdseye = np.dstack((zeros, zeros, zeros))

    # recast the x and y points into usable format for cv2.fillPoly()
    points_left = np.array([np.transpose(np.vstack([left_x, left_y]))])
    points_right = np.array([np.flipud(np.transpose(np.vstack([right_x, right_y])))])
    points = np.hstack((points_left, points_right))

    # draw the lane onto the warped blank image
    cv2.fillPoly(birdseye, [points.astype(np.int)], color)

    # warp the blank back to original image space using inverse perspective matrix (Minv)
    perspective = cv2.warpPerspective(birdseye, M_inv, (img_shape[1], img_shape[0]))
    
    return perspective
In [31]:
def overlay_detected_lane(img, left_poly, right_poly, M_inv, color=[0, 255, 0]):
    
    """Helper function to overlay the detected lane onto an image
    
    Parameters:
        • img - input image to get the overlay
        • left_poly - coefficients describing the polynomial of the left lane marker
        • right_poly - coefficients describing the polynomial of the right lane marker
        • M_inv - matrix describing the transformation from birds-eye to perspective view
        • color - color of the polygon to be drawn
        
    Returns:
        An image with the polygon representing the detected lane overlayed on it"""
    
    # create an image with the lane polygon
    lane_img = transform_lane_to_perspective(img.shape, left_poly, right_poly, M_inv, color)
    
    # overlay the lane polygon onto the original image
    lane_img = cv2.addWeighted(img, 1, lane_img, 0.3, 0)
    
    return lane_img
In [32]:
def overlay_lane_info(img, radius, offset):
    
    """Helper function to overlay the lane information onto image
    
    Parameters:
        • img - input image of the lane
        • radius - radius of the lane curvature
        • offset - offset of the car from the center of the lane
        
    Returns:
        An image with the info about the lane overlayed"""
    
    font = cv2.FONT_HERSHEY_SIMPLEX
    text1 = 'Radius of curvature: {:,.0f}m'.format(radius)
    text2 = 'Vehicle is {:.2f}m {} of center'.format(abs(offset), offset < 0.0 and 'left' or 'right')
    
    overlay_img = np.copy(img)
    
    cv2.putText(overlay_img, text1, (10, 60), font, 2.0, (255, 255, 255), 2, cv2.LINE_AA)
    cv2.putText(overlay_img, text2, (10, 130), font, 2.0, (255, 255, 255), 2, cv2.LINE_AA)
    
    return overlay_img
In [33]:
for birdseye_img, undistort_img, positional, file_base in zip(birdseye_imgs, undistort_imgs, positionals, file_bases):

    left_points, right_points, left_windows, right_windows = detect_lane_markings(birdseye_img)

    left_poly = fit_curve(left_points)
    right_poly = fit_curve(right_points)
    
    M, M_inv = gen_transform_matrices()
    
    overlay_img = overlay_detected_lane(undistort_img, left_poly, right_poly, M_inv)
    
    radius, offset = positional

    overlay_img = overlay_lane_info(overlay_img, radius, offset)
    
    cv2.imwrite('output_images/{}-05-overlay.jpg'.format(file_base), overlay_img)
        
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.set_title('Undistorted Image')
    ax1.imshow(cv2.cvtColor(undistort_img, cv2.COLOR_BGR2RGB))

    ax2.set_title('Overlay Lane Image')
    ax2.imshow(cv2.cvtColor(overlay_img, cv2.COLOR_BGR2RGB))

Pipeline

Put all the pieces (after the camera calibration) together into a single pipeline function

In [34]:
def find_lane(img, mtx=None, dist=None, save_base=None):
    
    """Find the lane in the image and overlay the detected lane and information about the position of the car
    and the curvature of the lane
    
    Parameters:
        • img - input image
        • mtx - camera matrix
        • dist - distortion coefficients
        • save_base - base filename (and path) to save intermediate images
        
     Returns:
         The input image with the detected lane and information about the lane overlayed"""
    
    save = save_base is not None
    
    overlay_img = np.copy(img)
    
    if save:
        cv2.imwrite('{}-00-orig.jpg'.format(save_base), overlay_img)
    
    if mtx is not None and dist is not None:
        # undistort the image
        overlay_img = undistort(overlay_img, mtx, dist)

        if save:
            cv2.imwrite('{}-01-undist.jpg'.format(save_base), overlay_img)
        
    # threshold the image
    working_img = threshold(overlay_img)
    
    if save:
        cv2.imwrite('{}-02-thresh.jpg'.format(save_base), np.dstack((working_img, working_img, working_img)) * 255)

    # transform the image to a birds-eye view
    working_img = transform_birdseye(working_img)
    
    if save:
        cv2.imwrite('{}-03-warp.jpg'.format(save_base), np.dstack((working_img, working_img, working_img)) * 255)

    # detect the lane markings
    left_points, right_points, left_windows, right_windows = detect_lane_markings(working_img)
    
    # determine the polynomial coefficients for the two lanes
    left_poly = fit_curve(left_points)
    right_poly = fit_curve(right_points)
    
    if save:
        detected_img = np.dstack((working_img, working_img, working_img)) * 255
    
        detected_img[left_points[:, 0], left_points[:, 1]] = [0, 0, 255]
        detected_img[right_points[:, 0], right_points[:, 1]] = [255, 0, 0]
    
        detected_img = draw_windows(detected_img, left_windows, right_windows)
        detected_img = draw_fitted_curves(detected_img, left_poly, right_poly)

        cv2.imwrite('{}-04-detect.jpg'.format(save_base), detected_img)

    # get the transform matrices
    M, M_inv = gen_transform_matrices()

    # overlay the detected lane onto the original undistorted image
    overlay_img = overlay_detected_lane(overlay_img, left_poly, right_poly, M_inv)
    
    # calculate lane curvature
    radius = calc_lane_curvature(working_img.shape, left_poly, right_poly)
    
    # calculate offset of the vehicle from the center of the lane
    offset = calc_lane_center_offset(working_img.shape, left_poly, right_poly)
    
    # overlay the lane information (curvature radius and offset from center)
    overlay_img = overlay_lane_info(overlay_img, radius, offset)
    
    if save:
        cv2.imwrite('{}-05-overlay.jpg'.format(save_base), overlay_img)

    return overlay_img
In [35]:
for filename, file_base in zip(glob.glob('test_images/*.jpg'), file_bases):
    
    orig_img = cv2.imread(filename)
    overlay_img = find_lane(orig_img, mtx, dist)
    
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.set_title('Original Image')
    ax1.imshow(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB))

    ax2.set_title('Overlay Lane Image')
    ax2.imshow(cv2.cvtColor(overlay_img, cv2.COLOR_BGR2RGB))
In [36]:
for filename in glob.glob('challenge/*.jpg'):
    
    save_parts = filename.split('/')
    save_base = '{}/output/{}'.format('/'.join(save_parts[:-1]), save_parts[-1])
    
    orig_img = cv2.imread(filename)
    overlay_img = find_lane(orig_img, mtx, dist, save_base=save_base)

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.set_title('Original Image')
    ax1.imshow(cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB))

    ax2.set_title('Overlay Lane Image')
    ax2.imshow(cv2.cvtColor(overlay_img, cv2.COLOR_BGR2RGB))
  

Test on Videos

Let's try the pipeline on a video

In [37]:
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML

1. project_video.mp4

In [38]:
project_output = 'project_output.mp4'
clip1 = VideoFileClip("project_video.mp4")
project_clip = clip1.fl_image(lambda x: find_lane(x, mtx, dist)) #NOTE: this function expects color images!!
%time project_clip.write_videofile(project_output, audio=False)
[MoviePy] >>>> Building video project_output.mp4
[MoviePy] Writing video project_output.mp4
100%|█████████▉| 1260/1261 [05:53<00:00,  4.00it/s]
[MoviePy] Done.
[MoviePy] >>>> Video ready: project_output.mp4 

CPU times: user 5min 24s, sys: 1min 12s, total: 6min 37s
Wall time: 5min 55s
In [39]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(project_output))
Out[39]:

2. challenge_video.mp4

In [ ]:
challenge_output = 'challenge_output.mp4'
clip2 = VideoFileClip("challenge_video.mp4")
challenge_clip = clip2.fl_image(lambda x: find_lane(x, mtx, dist)) #NOTE: this function expects color images!!
%time challenge_clip.write_videofile(challenge_output, audio=False)
In [ ]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(challenge_output))

3. harder_challenge_video.mp4

In [ ]:
harder_challenge_output = 'harder_challenge_output.mp4'
clip3 = VideoFileClip("harder_challenge_video.mp4")
harder_challenge_clip = clip3.fl_image(lambda x: find_lane(x, mtx, dist)) #NOTE: this function expects color images!!
%time harder_challenge_clip.write_videofile(harder_challenge_output, audio=False)
In [ ]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(harder_challenge_output))